# Replication script for Box-Steffensmeier and Moses, Science Advances, 2021
#The script that follows replicates all published results and figures.
#Data objects include the LDA model, coherece values and VADER scores. 

#    - Run Regressions in Paper
#    - Re-Create Figures
#    - Supplementary Materials Analysis, Tables and Figures
import pandas as pd
import scipy.special as sc
import numpy as np
import re
import pickle
import os
import seaborn as sns
import matplotlib.pyplot as plt
import statsmodels.api as sm
import statsmodels.formula.api as smf

from stargazer.stargazer import Stargazer

from sklearn import metrics
from pylab import rcParams
from matplotlib import rc
from sklearn.model_selection import train_test_split
from sklearn.metrics import *
from collections import defaultdict
from textwrap import wrap

from statsmodels.graphics.regressionplots import *
from sklearn.metrics import cohen_kappa_score
from nltk import agreement

from sklearn import preprocessing
get_ipython().run_line_magic('matplotlib', 'inline')

#sns.set(style='whitegrid', palette='muted', font_scale=1.2)
from gensim.test.utils import common_corpus, common_dictionary, datapath
from gensim import corpora
from gensim import models
RANDOM_SEED = 614

np.random.seed(RANDOM_SEED)
pd.set_option('display.max_columns', None)

###define functions###
def coef_df(results, variable_list):
    '''
    Takes in results of the model and returns a plot of
    the coefficients with 95% confidence intervals.
    and a list over variable names

    Removes intercept, so if uncentered will return error.
    '''
    # Create dataframe of results summary
    coef_df = pd.DataFrame(results.summary().tables[1].data)

    # Add column names
    coef_df.columns = coef_df.iloc[0]

    # Drop the extra row with column labels
    coef_df=coef_df.drop(0)

    # Set index to variable names
    coef_df = coef_df.set_index(coef_df.columns[0])

    # Change datatype from object to float
    coef_df = coef_df.astype(float)

    # Get errors; (coef - lower bound of conf interval)
    errors = coef_df['coef'] - coef_df['[0.025']

    # Append errors column to dataframe
    coef_df['errors'] = errors

    # Drop the constant for plotting
    coef_df = coef_df.drop(['const'])

    #variables = list(coef_df.index.values)

    # Add variables column to dataframe
    coef_df['variables'] = variable_list

    return coef_df

def influence_outlier_regressions(ols_model, data, rate, outcome_str):
    '''
    a function to remove and run regression with outliers removed at a specified rate
    '''
    ols_inf=ols_model.get_influence()

    ols_inf_df=ols_inf.summary_frame()


    cutoff=(ols_inf_df.loc[:,"cooks_d"])>(ols_inf_df.loc[:,"cooks_d"].median()*rate)

    outliers_df=data[~cutoff]

    outliers_y=outliers_df[outcome_str]

    outliers_x=np.asarray(outliers_df[covariates_engagement])


    ###Engagment OLS
    out_mod=sm.OLS(outliers_y,outliers_x)
    out_mod_fit=out_mod.fit(cov_type="HC3") #robust SES

    return(out_mod_fit)

# ### Data With Spanish Lanaguge Posts Removed
# - N is 11999 without
df=pd.read_csv("allposts.csv", header=0, index_col=0)


# ### VADER Compound_Score Distribution
vader_compound_dist=sns.displot(data=df, x="vader_compound", hue="Party_fin",
                                 common_norm=True,multiple="dodge",
                                palette=sns.color_palette(['blue', 'red'],))

vader_compound_dist.set(xlabel="sentiment score", ylabel="count")



vader_compound_dist._legend.set_title("Party")
vader_compound_dist._legend.set_in_layout(True)

vader_compound_dist.fig.set_figwidth(14)
vader_compound_dist.fig.set_figheight(5)

vader_compound_dist.fig

#vader_compound_dist.fig.savefig("VADER_Compound.png", dpi=600, bbox_inches='tight')


# ## Time Seires Plot:
##Create DF of Time Relevant Variables
time_df_vader_compound = df[["date_notime2", "vader_compound", "Party_fin"]]
time_df_vader_compound["date_use2"]=pd.to_datetime(time_df_vader_compound['date_notime2'], format='%Y-%m-%d')

dem_time_vader_compound=time_df_vader_compound[time_df_vader_compound["Party_fin"]=="Democrat"]
dem_time_vader_compound=dem_time_vader_compound.set_index('date_use2')


dem_means_vader_compound=dem_time_vader_compound.resample('d').mean()

rep_time_vader_compound=time_df_vader_compound[time_df_vader_compound["Party_fin"]=="Republican"]
rep_time_vader_compound=rep_time_vader_compound.set_index('date_use2')
r_means_vader_compound=rep_time_vader_compound.resample('d').mean()

concatenated_vader_compound = pd.concat([dem_means_vader_compound.assign(dataset='set1'),
                                    r_means_vader_compound.assign(dataset='set2')])

ts_plot_vader_compound=sns.lineplot(x="date_use2", y="vader_compound",
             hue="dataset",
             data=concatenated_vader_compound, palette=["blue", 'red'])

#ts_plot_vader_compound._legend.set_title("Party")
ts_plot_vader_compound.axvline(pd.to_datetime('2020-03-27'), color="grey", linestyle='--', lw=1) #cares act
ts_plot_vader_compound.axvline(pd.to_datetime('2020-05-28'), color='grey', linestyle='--', lw=1) #100000 dead

ts_plot_vader_compound.legend_.remove()
ts_plot_vader_compound.set(ylim=(-.6,1), xlim=('2020-02-29', '2020-10-30'),
                          xlabel="", ylabel='Average Sentiment Score')

ts_plot_vader_compound.set_xticklabels(['March','April','May','June','July','Aug.','Sept.','Oct.'], rotation=35)

ts_plot_vader_compound.annotate("CARES Act", xy=('2020-03-27', 1), xytext=('2020-03-28', -.3), rotation=49)
ts_plot_vader_compound.annotate("100k Deaths", xy=('2020-05-30', 1), xytext=('2020-05-30', -.53), rotation=45)

ts_plot_vader_compound.figure.set_figheight(7)
ts_plot_vader_compound.figure.set_figwidth(15)

#ts_plot_vader_compound.figure.savefig("time_series_compound.png", dpi=600, bbox_inches='tight')


# ### Regression Analysis
# #### Regression on Engagements
df_eng=pd.read_csv("eng.csv", header=0, index_col=0, dtype="float")

y_eng=df_eng["outcome_eng"]
covariates_engagement=["constant","not_safe_seat_5points","nominate_dim1","Leadership",
             "vader_compound"]

x_eng=np.asarray(df_eng[covariates_engagement])

###Engagment OLS
eng_model=sm.OLS(y_eng,x_eng)

eng_model_fit=eng_model.fit(cov_type="HC3") #robust SES

# #### Regression on Shares
df_shr=pd.read_csv("shr.csv", header=0, index_col=0, dtype="float")

y_shr=df_shr["outcome_share"]

covariates=["constant","not_safe_seat_5points","nominate_dim1","Leadership",
             "vader_compound"]

x_shr=np.asarray(df_shr[covariates_engagement])

###Share OLS
shr_model=sm.OLS(y_shr,x_shr)

shr_model_fit=shr_model.fit(cov_type="HC3") #robust SES

varlist=["Swing District", "Ideology", "Leadership", "Tone"]

all_engagement_coefs=coef_df(eng_model_fit,varlist)

shares_coefs=coef_df(shr_model_fit,varlist)

fig, ax = plt.subplots(figsize=(15, 10))
plt.style.use('seaborn-ticks')

    # Error bars for 95% confidence interval
    # Can increase capsize to add whiskers
all_engagement_coefs.plot(x='variables', y='coef', kind='bar',
                 ax=ax, color='none', fontsize=22,
                 ecolor='slategrey',capsize=5,
                 yerr='errors', legend=False)

    # Set title & labels
plt.title('Engagement Regression Coefs w/ 95% Confidence Intervals',fontsize=40)
ax.set_ylabel('Coefficients',fontsize=22)
ax.set_xlabel('',fontsize=22)

    # Coefficients
ax.scatter(x=np.arange(all_engagement_coefs.shape[0]),
               marker='o', s=250,
               y=all_engagement_coefs['coef'], color='slategrey')

    # Line to define zero on the y-axis
ax.axhline(y=0, linestyle='--', color='dimgrey', linewidth=1, label="_nolegend_")

shares_coefs.plot(x='variables', y='coef', kind='bar',
                 ax=ax, color='none', fontsize=22,
                 ecolor='slategrey',capsize=5,
                 yerr='errors', legend=False)

    # Set title & labels
plt.title('',fontsize=40)
ax.set_ylabel('Coefficients',fontsize=22)
ax.set_xlabel('',fontsize=22)

    # Coefficients
ax.scatter(x=np.arange(shares_coefs.shape[0]),
               marker='v', s=250,
               y=shares_coefs['coef'], color='slategrey')

plt.legend(('Engagement', "Shares"),fontsize=15, bbox_to_anchor=(1.04,1), loc="upper left")
plt.savefig("Engagement_Shares_Coefs_robustSES.png",dpi=600, bbox_inches='tight')

# #### Regressions on Emotional Reactions
##Love
df_love=pd.read_csv("love_reacts.csv", header=0, index_col=0, dtype="float")

y_love=df_love["outcome_love"]

x_love=np.asarray(df_love[covariates_engagement])

love_model=sm.OLS(y_love,x_love)

love_model_fit=love_model.fit(cov_type="HC3") #robust SES

##sad
df_sad=pd.read_csv("sad_reacts.csv", header=0, index_col=0, dtype="float")

y_sad=df_sad["outcome_sad"]

x_sad=np.asarray(df_sad[covariates_engagement])

sad_model=sm.OLS(y_sad,x_sad)

sad_model_fit=sad_model.fit(cov_type="HC3") #robust SES

#wow
df_wow=pd.read_csv("wow_reacts.csv", header=0, index_col=0, dtype="float")

y_wow=df_wow["outcome_wow"]

x_wow=np.asarray(df_wow[covariates_engagement])

wow_model=sm.OLS(y_wow,x_wow)

wow_model_fit=wow_model.fit(cov_type="HC3") #robust SES

#angry
df_angry=pd.read_csv("angry_reacts.csv", header=0, index_col=0, dtype="float")

y_angry=df_angry["outcome_angry"]

x_angry=np.asarray(df_angry[covariates_engagement])

angry_model=sm.OLS(y_angry,x_angry)

angry_model_fit=angry_model.fit(cov_type="HC3") #robust SES

#care
df_care=pd.read_csv("care_reacts.csv", header=0, index_col=0, dtype="float")

y_care=df_care["outcome_care"]

x_care=np.asarray(df_care[covariates_engagement])

care_model=sm.OLS(y_care,x_care)

care_model_fit=care_model.fit(cov_type="HC3") #robust SES

#haha
df_haha=pd.read_csv("haha_reacts.csv", header=0, index_col=0, dtype="float")

y_haha=df_haha["outcome_haha"]

x_haha=np.asarray(df_haha[covariates_engagement])

###Share OLS
haha_model=sm.OLS(y_haha,x_haha)

haha_model_fit=haha_model.fit(cov_type="HC3") #robust SES

# #### Figure on Emotional Reactions
angr_coef_df=coef_df(angry_model_fit,varlist)
care_coef_df=coef_df(care_model_fit,varlist)
haha_coef_df=coef_df(haha_model_fit,varlist)
love_coef_df=coef_df(love_model_fit,varlist)
sads_coef_df=coef_df(sad_model_fit,varlist)
wow_coef_df=coef_df(wow_model_fit,varlist)

fig, ax = plt.subplots(figsize=(15, 10))
#plt.style.use('seaborn-ticks')
angr_coef_df.plot(x='variables', y='coef', kind='bar',
             ax=ax, color='None', fontsize=22,
             capsize=10,
             yerr='errors', legend=False)

#points
ax.scatter(x=np.arange(angr_coef_df.shape[0]),
           marker='o', s=250, color='slategrey',
           y=angr_coef_df['coef'])
##CARE
care_coef_df.plot(x='variables', y='coef', kind='bar',
             ax=ax, color='None', fontsize=22,
             capsize=10,
             yerr='errors', legend=False)

#points
ax.scatter(x=np.arange(care_coef_df.shape[0]),
           marker='v', s=250, color='slategrey',
           y=care_coef_df['coef'])
 ## HAHA
haha_coef_df.plot(x='variables', y='coef', kind='bar',
             ax=ax, color='None', fontsize=22,
             capsize=10,
             yerr='errors', legend=False)
#points
ax.scatter(x=np.arange(haha_coef_df.shape[0]),
           marker='s', s=250, color='slategrey',
           y=haha_coef_df['coef'] )

 ## LOVE
love_coef_df.plot(x='variables', y='coef', kind='bar',
             ax=ax, color='None', fontsize=22,
             capsize=10,
             yerr='errors', legend=False)
#points
ax.scatter(x=np.arange(love_coef_df.shape[0]),
            marker='*', s=250, color='slategrey',
            y=love_coef_df['coef'])

 ## SAD
sads_coef_df.plot(x='variables', y='coef', kind='bar',
             ax=ax, color='None', fontsize=22,
             capsize=10,yerr='errors', legend=False)
#points
ax.scatter(x=np.arange(sads_coef_df.shape[0]),
           marker='d', color='slategrey',
           s=250, y=sads_coef_df['coef'])

 ## WOW
wow_coef_df.plot(x='variables', y='coef', kind='bar',
             ax=ax, color='None', fontsize=22,
             capsize=10,
             yerr='errors', legend=False)
#points
ax.scatter(x=np.arange(wow_coef_df.shape[0]),
           marker='x', s=250, color='slategrey',
           y=wow_coef_df['coef'])

# Set title & labels
plt.title('',fontsize=30)
ax.set_ylabel('Coefficients',fontsize=22)
ax.set_xlabel('',fontsize=22)

# LINE to define zero on the y-axis
ax.axhline(y=0, linestyle='--', color='dimgrey', linewidth=1, label="_nolegend_")

plt.legend(('Anger', "Care", "Haha", "Love", "Sad", "Wow"),
           fontsize=15, bbox_to_anchor=(1.04,1), loc="upper left")

#plt.savefig("Emotional_Reactions_Coefs_robustSES.png", dpi=600, bbox_inches='tight')


# # LDA Figures and Analysis

lda_mod=gensim.models.LdaModel.load("ldamodel/LDA_Model.gensim")

lda_mod.show_topics(num_words=15,formatted=False)
# ### Topic and Tone:
vader_topics_lda=pd.read_csv("lda_vader.csv")

party_sentiment_topic=vader_topics_lda.reset_index()

sent_lda=sns.barplot(data=party_sentiment_topic, x="topic_max", y="vader_compound",hue="Party_fin",
                    palette=dict(Democrat = 'blue', Republican = 'red'))

sent_lda.legend(title='Party', bbox_to_anchor=(.65,1.35), frameon=False)
sent_lda.set(xlabel="", ylabel="Average Sentiment Score")
sent_lda.set_xticklabels(["Need for Fed. Support", "CARES Act", "Vaccine & Testing", "States & Spread", "Fed. Heath Care Protection",
                                "Communities & Front Lines", "Family & Community", "Postal Services", "Soical Distancing",
                                "Administration", "Business & Economy", "America", "Federal Response & Hearings", "Resources & Information Sharing",
                                "Politics"],ha="right", rotation=45)

sent_lda.figure.savefig("Sent_Topics_Party.png",bbox_inches='tight', dpi=600)

# ### Topic and Frequency
party_freq_topic=pd.read_csv('lda_freq.csv',
               index_col=0)

freq_lda = party_freq_topic.plot.bar(color=["blue", "red"])

freq_lda.set(xlabel="", ylabel="Count")
freq_lda.legend(title='', bbox_to_anchor=(1.4
                                          ,.5), loc='center right')
#freq_lda.figure.set_figwidth(6)
#freq_lda.figure.set_figheight(4)

freq_lda.set_xticklabels(["Need for Fed. Support", "CARES Act", "Vaccine & Testing", "States & Spread",
                          "Fed. Heath Care Protection","Communities & Front Lines", "Family & Community",
                          "Postal Services", "Soical Distancing", "Trump Administration", "Business & Economy",
                          "America", "Federal Response & Hearings", "Resources & Information Sharing",
                                "Politics"],ha="right", rotation=45)

freq_lda.legend_.remove()
#freq_lda.figure.savefig("Freq_Topics_Party_abovelng.png",bbox_inches='tight', dpi=600)

# # Supplement Table and Figures

# ## Frequency of Total COVID Posts by MCs

### Prevalent COVID Posters:
covid_posters=df.groupby(by=["Page Name"]).agg(
    count_posts=pd.NamedAgg(column="Shares", aggfunc='count'))

# ### Hist Plot of Posts by MCs
st_MCS=sns.histplot(x="count_posts", kde=True, data=covid_posters, color="grey")
hist_MCS.set(xlabel="N Posts")

#hist_MCS.figure.savefig("MC_Counts_Histplot.png", dpi=600)

# ## Correlation of DVS and of IVs Used in Regression:
mask_ut=np.triu(np.ones(dv_corr.shape)).astype(np.bool) #mask
dv_corr=df[["Shares", "engagements_ln", "Love", "Wow", "Haha", "Sad", "Angry","Care"]].corr()
ax = sns.heatmap(
    dv_corr,
    vmin=-1, vmax=1, center=0,
    cmap=sns.diverging_palette(20, 220, n=200),
    square=True
)
ax.set_xticklabels(['Shares','Total Engagments','Love','Wow', 'Haha', 'Sad', 'Angry', 'Care'],
    rotation=45,
    horizontalalignment='right')
ax.set_yticklabels(['Shares','Total Engagments','Love','Wow', 'Haha', 'Sad', 'Angry', 'Care'])
#ax.get_figure().savefig("dv_correlation.png",dpi=600, bbox_inches='tight')

##Correlation Plots Check Correlation of IVs-All Engagement Base Categorica:
corr = df_eng[[
 'not_safe_seat_5points',
 'nominate_dim1',
 'Leadership',
 'vader_compound']].corr()
ax = sns.heatmap(
    corr,
    vmin=-1, vmax=1, center=0,
    cmap=sns.diverging_palette(20, 220, n=200),
    square=True
)
ax.set_xticklabels(['Swing District','DW-NOM','Leadership','Tone'],
    rotation=45,
    horizontalalignment='right')
ax.set_yticklabels(['Swing District','DW-NOM','Leadership','Tone'])

#ax.get_figure().savefig("covariate_correlation.png", dpi=600, bbox_inches='tight')

# ## Histograms of DVs and Key IVs
new_vcompound = sc.logit((df.vader_compound + 1) / 2)

hist_vader_compound=sns.histplot(sc.logit((df.vader_compound + 1) / 2), color="grey", kde=True)
hist_vader_compound.set(xlabel="Transformed Sentiment Scores",
                       ylabel="Count")

#hist_vader_compound.figure.savefig("transformed_VADER_hist.png", dpi=600)

df_newtone=pd.read_csv("eng_altVADER.csv", header=0, index_col=0)

y_newt=df_newtone["outcome_eng"]

new_cov=["constant","not_safe_seat_5points","nominate_dim1","Leadership",
             "vader_newcompound"]

x_newt=np.asarray(df_newtone[new_cov])

newt_model=sm.OLS(y_newt,x_newt)

newt_model_fit=newt_model.fit(cov_type="HC3") #robust SES

new_old_tone=pd.concat([newton_eng_coef[3:],
all_engagement_coefs[3:]]).reset_index()

# ## Regression Tables for Results in Paper:
# ### Engagement and Shares:

share_eng_model_tabs=Stargazer([
eng_model_fit,
shr_model_fit])
share_eng_model_tabs.rename_covariates({"const": "constant", "x1":"Swing District",
                                 "x2":"Ideology", "x3": "Leadership",
                                           "x4":"Tone"})

share_eng_model_tabs.significant_digits(2)

# ### Emotional Reactions
emotion_models_tabs=Stargazer([
angry_model_fit, care_model_fit,haha_model_fit, love_model_fit, sad_model_fit, wow_model_fit])

emotion_models_tabs.rename_covariates({"const": "constant", "x1":"Swing District",
                                 "x2":"Ideology", "x3": "Leadership",
                                           "x4":"Tone"})

emotion_models_tabs.significant_digits(2)

# ## Regression Model Figures for Non-Adjusted SES:

angry_model_nres=angry_model.fit()
care_model_nres=care_model.fit()
haha_model_nres=haha_model.fit()
love_model_nres=love_model.fit()
sad_model_nres=sad_model.fit()
wow_model_nres=wow_model.fit()

eng_model_nres=eng_model.fit()
shr_model_nres=shr_model.fit()

models_tabs_nres=Stargazer([
eng_model_nres,shr_model_nres, angry_model_nres, care_model_nres,haha_model_nres, love_model_nres, sad_model_nres, wow_model_nres])

models_tabs_nres.rename_covariates({"const": "constant", "x1":"Swing District",
                                 "x2":"Ideology", "x3": "Leadership",
                                           "x4":"Tone"})

models_tabs_nres.significant_digits(2)

# ### Emotional Reactions-NRES
angr_coef_nres=coef_df(angry_model_nres,varlist)
care_coef_nres=coef_df(care_model_nres,varlist)
haha_coef_nres=coef_df(haha_model_nres,varlist)
love_coef_nres=coef_df(love_model_nres,varlist)
sads_coef_nres=coef_df(sad_model_nres,varlist)
wow_coef_nres=coef_df(wow_model_nres,varlist)

fig, ax = plt.subplots(figsize=(15, 10))
angr_coef_nres.plot(x='variables', y='coef', kind='bar',
             ax=ax, color='None', fontsize=22,
             capsize=10,
             yerr='errors', legend=False)

#points
ax.scatter(x=np.arange(angr_coef_nres.shape[0]),
           marker='o', s=250, color='slategrey',
           y=angr_coef_nres['coef'])
##CARE
care_coef_nres.plot(x='variables', y='coef', kind='bar',
             ax=ax, color='None', fontsize=22,
             capsize=10,
             yerr='errors', legend=False)

#points
ax.scatter(x=np.arange(care_coef_nres.shape[0]),
           marker='v', s=250, color='slategrey',
           y=care_coef_nres['coef'])
 ## HAHA
haha_coef_nres.plot(x='variables', y='coef', kind='bar',
             ax=ax, color='None', fontsize=22,
             capsize=10,
             yerr='errors', legend=False)
#points
ax.scatter(x=np.arange(haha_coef_nres.shape[0]),
           marker='s', s=250, color='slategrey',
           y=haha_coef_nres['coef'] )

 ## LOVE
love_coef_nres.plot(x='variables', y='coef', kind='bar',
             ax=ax, color='None', fontsize=22,
             capsize=10,
             yerr='errors', legend=False)
#points
ax.scatter(x=np.arange(love_coef_nres.shape[0]),
            marker='*', s=250, color='slategrey',
            y=love_coef_nres['coef'])

 ## SAD
sads_coef_nres.plot(x='variables', y='coef', kind='bar',
             ax=ax, color='None', fontsize=22,
             capsize=10,yerr='errors', legend=False)
#points
ax.scatter(x=np.arange(sads_coef_nres.shape[0]),
           marker='d', color='slategrey',
           s=250, y=sads_coef_nres['coef'])

 ## WOW
wow_coef_nres.plot(x='variables', y='coef', kind='bar',
             ax=ax, color='None', fontsize=22,
             capsize=10,
             yerr='errors', legend=False)
#points
ax.scatter(x=np.arange(wow_coef_nres.shape[0]),
           marker='x', s=250, color='slategrey',
           y=wow_coef_nres['coef'])

# Set title & labels
plt.title('',fontsize=30)
ax.set_ylabel('Coefficients',fontsize=22)
ax.set_xlabel('',fontsize=22)

# LINE to define zero on the y-axis
ax.axhline(y=0, linestyle='--', color='dimgrey', linewidth=1, label="_nolegend_")

plt.legend(('Anger', "Care", "Haha", "Love", "Sad", "Wow"),
           fontsize=15, bbox_to_anchor=(1.04,1), loc="upper left")
plt.savefig("Emotional_Reactions_Coefs_NoRES.png",dpi=600, bbox_inches='tight')

### Engagement and Shares
all_engagement_coefs_nres=coef_df(eng_model_nres,varlist)

shares_coefs_nres=coef_df(shr_model_nres,varlist)

fig, ax = plt.subplots(figsize=(15, 10))
plt.style.use('seaborn-ticks')

    # Error bars for 95% confidence interval
    # Can increase capsize to add whiskers
all_engagement_coefs_nres.plot(x='variables', y='coef', kind='bar',
                 ax=ax, color='none', fontsize=22,
                 ecolor='slategrey',capsize=5,
                 yerr='errors', legend=False)

    # Set title & labels
plt.title('Engagement Regression Coefs w/ 95% Confidence Intervals',fontsize=40)
ax.set_ylabel('Coefficients',fontsize=22)
ax.set_xlabel('',fontsize=22)

    # Coefficients
ax.scatter(x=np.arange(all_engagement_coefs_nres.shape[0]),
               marker='o', s=250,
               y=all_engagement_coefs_nres['coef'], color='slategrey')

    # Line to define zero on the y-axis
ax.axhline(y=0, linestyle='--', color='dimgrey', linewidth=1, label="_nolegend_")

shares_coefs_nres.plot(x='variables', y='coef', kind='bar',
                 ax=ax, color='none', fontsize=22,
                 ecolor='slategrey',capsize=5,
                 yerr='errors', legend=False)

    # Set title & labels
plt.title('',fontsize=40)
ax.set_ylabel('Coefficients',fontsize=22)
ax.set_xlabel('',fontsize=22)

    # Coefficients
ax.scatter(x=np.arange(shares_coefs_nres.shape[0]),
               marker='v', s=250,
               y=shares_coefs_nres['coef'], color='slategrey')

plt.legend(('Engagement', "Shares"),fontsize=15, bbox_to_anchor=(1.04,1), loc="upper left")
#plt.savefig("Engagement_shares_Coefss_NoRES.png",dpi=600,bbox_inches='tight')


# ### Descriptives of DVs
fig, axs = plt.subplots(2, 4, figsize=(15,10))

eng_plt=sns.histplot(data=df_eng, x="outcome_eng", kde=True, color="dimgrey",ax=axs[0, 0])
eng_plt.set_xlabel("Engagement")
eng_plt.set(facecolor="white")

share_plt=sns.histplot(data=df_shr, x="outcome_share", kde=True, color="grey", ax=axs[0, 1])
share_plt.set_xlabel("Shares")
share_plt.set_ylabel("")
share_plt.set(facecolor="white")

angry_plt=sns.histplot(data=df_angry, x="outcome_angry", kde=True, color="darkgrey", ax=axs[0, 2])
angry_plt.set_xlabel("Angry")
angry_plt.set_ylabel("")
angry_plt.set(facecolor="white")


love_plt=sns.histplot(data=df_love, x="outcome_love", kde=True, color="silver", ax=axs[0, 3])
love_plt.set_xlabel("Love")
love_plt.set_ylabel("")
love_plt.set(facecolor="white")

wow_plt=sns.histplot(data=df_wow, x="outcome_wow", kde=True, color="silver", ax=axs[1, 0])
wow_plt.set_xlabel("Wow")
wow_plt.set(facecolor="white")

haha_plt=sns.histplot(data=df_haha, x="outcome_haha", kde=True, color="darkgrey", ax=axs[1, 1])
haha_plt.set_xlabel("Haha")
haha_plt.set_ylabel("")
haha_plt.set(facecolor="white")

sad_plt=sns.histplot(data=df_sad, x="outcome_sad", kde=True, color="grey", ax=axs[1, 2])
sad_plt.set_xlabel("Sad")
sad_plt.set_ylabel("")
sad_plt.set(facecolor="white")

care_plt=sns.histplot(data=df_care, x="outcome_care", kde=True, color="dimgrey", ax=axs[1, 3])
care_plt.set_xlabel("Care")
care_plt.set_ylabel("")
care_plt.set(facecolor="white")

#fig.savefig("densities_dvs.png",dpi=600, bbox_inches='tight')


# ### Cook's Distance Outliers
eng_out_mod_fit=influence_outlier_regressions(eng_model_fit, df_eng, 3, "outcome_eng")

shares_out_mod_fit=influence_outlier_regressions(shr_model_fit, df_shr, 3, "outcome_share")

sad_out_mod_fit=influence_outlier_regressions(sad_model_fit, df_sad, 3, "outcome_sad")
love_out_mod_fit=influence_outlier_regressions(love_model_fit, df_love, 3, "outcome_love")
angry_out_mod_fit=influence_outlier_regressions(angry_model_fit, df_angry, 3, "outcome_angry")
wow_out_mod_fit=influence_outlier_regressions(wow_model_fit, df_wow, 3, "outcome_wow")
haha_out_mod_fit=influence_outlier_regressions(haha_model_fit, df_haha, 3, "outcome_haha")
care_out_mod_fit=influence_outlier_regressions(care_model_fit, df_care, 3, "outcome_care")

#Coef
angr_coef_df2=coef_df(angry_out_mod_fit,varlist)
care_coef_df2=coef_df(care_out_mod_fit,varlist)
haha_coef_df2=coef_df(haha_out_mod_fit,varlist)
love_coef_df2=coef_df(love_out_mod_fit,varlist)
sads_coef_df2=coef_df(sad_out_mod_fit,varlist)
wow_coef_df2=coef_df(wow_out_mod_fit,varlist)

fig, ax = plt.subplots(figsize=(15, 10))
#plt.style.use('seaborn-ticks')

# ANGER ###Can increase capsize to add whiskers
angr_coef_df2.plot(x='variables', y='coef', kind='bar',
             ax=ax, color='None', fontsize=22,
             capsize=10,
             yerr='errors', legend=False)

#points
ax.scatter(x=np.arange(angr_coef_df2.shape[0]),
           marker='o', s=250, color='slategrey',
           y=angr_coef_df2['coef'])
##CARE
care_coef_df2.plot(x='variables', y='coef', kind='bar',
             ax=ax, color='None', fontsize=22,
             capsize=10,
             yerr='errors', legend=False)

#points
ax.scatter(x=np.arange(care_coef_df2.shape[0]),
           marker='v', s=250, color='slategrey',
           y=care_coef_df2['coef'])
 ## HAHA
haha_coef_df2.plot(x='variables', y='coef', kind='bar',
             ax=ax, color='None', fontsize=22,
             capsize=10,
             yerr='errors', legend=False)
#points
ax.scatter(x=np.arange(haha_coef_df2.shape[0]),
           marker='s', s=250, color='slategrey',
           y=haha_coef_df2['coef'] )

 ## LOVE
love_coef_df2.plot(x='variables', y='coef', kind='bar',
             ax=ax, color='None', fontsize=22,
             capsize=10,
             yerr='errors', legend=False)
#points
ax.scatter(x=np.arange(love_coef_df2.shape[0]),
            marker='*', s=250, color='slategrey',
            y=love_coef_df2['coef'])

 ## SAD
sads_coef_df2.plot(x='variables', y='coef', kind='bar',
             ax=ax, color='None', fontsize=22,
             capsize=10,yerr='errors', legend=False)
#points
ax.scatter(x=np.arange(sads_coef_df2.shape[0]),
           marker='d', color='slategrey',
           s=250, y=sads_coef_df2['coef'])

 ## WOW
wow_coef_df2.plot(x='variables', y='coef', kind='bar',
             ax=ax, color='None', fontsize=22,
             capsize=10,
             yerr='errors', legend=False)
#points
ax.scatter(x=np.arange(wow_coef_df2.shape[0]),
           marker='x', s=250, color='slategrey',
           y=wow_coef_df2['coef'])

# Set title & labels
plt.title('',fontsize=30)
ax.set_ylabel('Coefficients',fontsize=22)
ax.set_xlabel('',fontsize=22)

# LINE to define zero on the y-axis
ax.axhline(y=0, linestyle='--', color='dimgrey', linewidth=1, label="_nolegend_")

plt.legend(('Anger', "Care", "Haha", "Love", "Sad", "Wow"),
           fontsize=15, bbox_to_anchor=(1.04,1), loc="upper left")

#plt.savefig("Emotional_Reactions_Coefs_CooksOutliers.png",dpi=600, bbox_inches='tight')
all_engagement_coefs2=coef_df(eng_out_mod_fit,varlist)

shares_coefs2=coef_df(shares_out_mod_fit,varlist)

fig, ax = plt.subplots(figsize=(15, 10))
plt.style.use('seaborn-ticks')
    # Error bars for 95% confidence interval
    # Can increase capsize to add whiskers
all_engagement_coefs2.plot(x='variables', y='coef', kind='bar',
                 ax=ax, color='none', fontsize=22,
                 ecolor='slategrey',capsize=5,
                 yerr='errors', legend=False)

    # Set title & labels
plt.title('Engagement Regression Coefs w/ 95% Confidence Intervals',fontsize=40)
ax.set_ylabel('Coefficients',fontsize=22)
ax.set_xlabel('',fontsize=22)

    # Coefficients
ax.scatter(x=np.arange(all_engagement_coefs2.shape[0]),
               marker='o', s=250,
               y=all_engagement_coefs2['coef'], color='slategrey')

    # Line to define zero on the y-axis
ax.axhline(y=0, linestyle='--', color='dimgrey', linewidth=1, label="_nolegend_")

shares_coefs2.plot(x='variables', y='coef', kind='bar',
                 ax=ax, color='none', fontsize=22,
                 ecolor='slategrey',capsize=5,
                 yerr='errors', legend=False)

    # Set title & labels
plt.title('',fontsize=40)
ax.set_ylabel('Coefficients',fontsize=22)
ax.set_xlabel('',fontsize=22)

    # Coefficients
ax.scatter(x=np.arange(shares_coefs2.shape[0]),
               marker='v', s=250,
               y=shares_coefs2['coef'], color='slategrey')

plt.legend(('Engagement', "Shares"),fontsize=15, bbox_to_anchor=(1.04,1), loc="upper left")
#plt.savefig("Engagement_shares_Coefs_CooksOutliers.png",dpi=600,bbox_inches='tight')


# #### Tables:
models_tabs_cooks=Stargazer([eng_out_mod_fit,shares_out_mod_fit, sad_out_mod_fit,
                             love_out_mod_fit, angry_out_mod_fit,wow_out_mod_fit,
                             haha_out_mod_fit,care_out_mod_fit])

models_tabs_cooks.rename_covariates({"const": "constant", "x1":"Swing District",
                                 "x2":"Ideology", "x3": "Leadership",
                                           "x4":"Tone"})

models_tabs_cooks.significant_digits(2)
# ### LDA Coherence Scores:
coherence_values=pickle.load(open("lda_coherence.p", "rb"))
x = range(2, 52, 1)
cv_coherence=sns.lineplot(x=x, y=coherence_values, legend='full')
cv_coherence.set(xlabel="N Topics", ylabel="Score")
cv_coherence.set(ylim=(min(coherence_values), max(coherence_values)+.01),
                 xlim=(.75, 51.25))
#cv_coherence.figure.savefig("lda_coherence.png", dpi=600, bbox_inches="tight")

# # Coder Validation
df_hc=pd.read_csv("df_hc.csv", header=0, index_col=0)

nscores=df_hc.n_score.astype("int")
final_s=df_hc.final_s.astype('int')

hc_cm=metrics.confusion_matrix(nscores, final_s)
hc_cm
metrics_hc=metrics.classification_report(nscores, final_s, digits=2)

pd.crosstab(df_hc.n_score,df_hc.final_s, rownames=['n_score'], colnames=['final_s'])

df_cm = pd.DataFrame(metrics.confusion_matrix(nscores, final_s))
# plt.figure(figsize=(10,7))
sns.set(font_scale=1.4) # for label size
cm_plot=sns.heatmap(df_cm,
            annot=True,
    cmap=sns.diverging_palette(20, 220, n=200))
# font size
cm_plot.set_xlabel('Human')
cm_plot.set_ylabel('VADER')

cm_plot.set_xticklabels(['negative', "neutral", 'positive'], rotation=35)
cm_plot.set_yticklabels(['negative', "neutral", 'positive'], rotation=35)
#cm_plot.figure.savefig("confusion_matrix_hc.png", dpi=600, bbox_inches='tight')
